{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pandas处理重复值和异常值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1、删除重复行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_df(indexs, columns):\n",
    "    data = [[str(j)+str(i) for j in columns] for i in indexs]\n",
    "    df = pd.DataFrame(data=data, index=indexs, columns=columns)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 使用duplicated()函数检测重复的行\n",
    "    - 返回元素为布尔类型的Series对象\n",
    "    - 每个元素对应一行，如果该行不是第一次出现，则元素为True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A4</td>\n",
       "      <td>B4</td>\n",
       "      <td>C4</td>\n",
       "      <td>D4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B   C   D\n",
       "1  A1  B1  C1  D1\n",
       "2  A2  B2  C2  D2\n",
       "3  A3  B3  C3  D3\n",
       "4  A4  B4  C4  D4"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = make_df([1, 2, 3, 4], list('ABCD'))\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A4</td>\n",
       "      <td>B4</td>\n",
       "      <td>C4</td>\n",
       "      <td>D4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B   C   D\n",
       "1  A2  B2  C2  D2\n",
       "2  A2  B2  C2  D2\n",
       "3  A3  B3  C3  D3\n",
       "4  A4  B4  C4  D4"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 让第一行和第二行重复\n",
    "df.loc[1] = df.loc[2]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1     True\n",
       "2     True\n",
       "3    False\n",
       "4    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 判断是否和前面的行重复了\n",
    "df.duplicated()\n",
    "\n",
    "df.duplicated(keep='first')  # 保留第一行\n",
    "df.duplicated(keep='last')  # 保留最后一行\n",
    "df.duplicated(keep=False)  # 标记所有重复行，不保留任何一行\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>DDD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A4</td>\n",
       "      <td>B4</td>\n",
       "      <td>C4</td>\n",
       "      <td>D4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B   C    D\n",
       "1  A2  B2  C2  DDD\n",
       "2  A2  B2  C2   D2\n",
       "3  A3  B3  C3   D3\n",
       "4  A4  B4  C4   D4"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[1, 'D'] = 'DDD'\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    False\n",
       "2     True\n",
       "3    False\n",
       "4    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# subset: 子集\n",
    "df.duplicated(subset=['A','B','C'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 使用drop_duplicates()函数删除重复的行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A4</td>\n",
       "      <td>B4</td>\n",
       "      <td>C4</td>\n",
       "      <td>D4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B   C   D\n",
       "2  A2  B2  C2  D2\n",
       "3  A3  B3  C3  D3\n",
       "4  A4  B4  C4  D4"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates()\n",
    "df.drop_duplicates(subset=['A', 'B', 'C'])\n",
    "df.drop_duplicates(subset=['A', 'B', 'C'], keep='last')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. 映射"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "映射的含义：创建一个映射关系列表，把values元素和一个特定的标签或者字符串绑定。\n",
    "\n",
    "包含三种操作：\n",
    "\n",
    "- replace()函数：替换元素\n",
    "- map()函数：新建一列, 最重要\n",
    "- rename()函数：替换索引"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1) replace()函数：替换元素"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用replace()函数，对values进行替换操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>Java</th>\n",
       "      <th>H5</th>\n",
       "      <th>UI</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>张三</th>\n",
       "      <td>61</td>\n",
       "      <td>23</td>\n",
       "      <td>98</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张三丰</th>\n",
       "      <td>33</td>\n",
       "      <td>57</td>\n",
       "      <td>55</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>李白</th>\n",
       "      <td>72</td>\n",
       "      <td>86</td>\n",
       "      <td>83</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>杜甫</th>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>91</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Python  Java  H5  UI\n",
       "张三       61    23  98  83\n",
       "张三丰      33    57  55  10\n",
       "李白       72    86  83   5\n",
       "杜甫       21     3  91  34"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index = ['张三', '张三丰', '李白', '杜甫']\n",
    "columns = ['Python', 'Java', 'H5', \"UI\"]\n",
    "data = np.random.randint(0, 100, size=(4, 4))\n",
    "\n",
    "df = pd.DataFrame(data=data, index=index, columns=columns)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>Java</th>\n",
       "      <th>H5</th>\n",
       "      <th>UI</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>张三</th>\n",
       "      <td>61</td>\n",
       "      <td>23</td>\n",
       "      <td>98</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张三丰</th>\n",
       "      <td>33</td>\n",
       "      <td>57</td>\n",
       "      <td>55</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>李白</th>\n",
       "      <td>72</td>\n",
       "      <td>86</td>\n",
       "      <td>83</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>杜甫</th>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>91</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Python  Java  H5  UI\n",
       "张三       61    23  98  83\n",
       "张三丰      33    57  55  10\n",
       "李白       72    86  83  50\n",
       "杜甫       21     3  91  34"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 替换元素\n",
    "df.replace({5: 50, 1: 100})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 2) map()函数：适合处理某一单独的列"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "map()函数中可以使用lambda函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>Java</th>\n",
       "      <th>H5</th>\n",
       "      <th>UI</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>张三</th>\n",
       "      <td>61</td>\n",
       "      <td>23</td>\n",
       "      <td>98</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张三丰</th>\n",
       "      <td>33</td>\n",
       "      <td>57</td>\n",
       "      <td>55</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>李白</th>\n",
       "      <td>72</td>\n",
       "      <td>86</td>\n",
       "      <td>83</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>杜甫</th>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>91</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Python  Java  H5  UI\n",
       "张三       61    23  98  83\n",
       "张三丰      33    57  55  10\n",
       "李白       72    86  83   5\n",
       "杜甫       21     3  91  34"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = df.copy()\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>Java</th>\n",
       "      <th>H5</th>\n",
       "      <th>UI</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>张三</th>\n",
       "      <td>61</td>\n",
       "      <td>23</td>\n",
       "      <td>98</td>\n",
       "      <td>83</td>\n",
       "      <td>610</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张三丰</th>\n",
       "      <td>33</td>\n",
       "      <td>57</td>\n",
       "      <td>55</td>\n",
       "      <td>10</td>\n",
       "      <td>330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>李白</th>\n",
       "      <td>72</td>\n",
       "      <td>86</td>\n",
       "      <td>83</td>\n",
       "      <td>5</td>\n",
       "      <td>720</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>杜甫</th>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>91</td>\n",
       "      <td>34</td>\n",
       "      <td>210</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Python  Java  H5  UI  Pandas\n",
       "张三       61    23  98  83     610\n",
       "张三丰      33    57  55  10     330\n",
       "李白       72    86  83   5     720\n",
       "杜甫       21     3  91  34     210"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# map一般用在Series数据结构，不能用于DataFrame\n",
    "# df2['Python'].map({16: 160, 12: 120, 17: 170, 60:600})\n",
    "\n",
    "# 将Python的每个人的成绩乘以10\n",
    "df2['Python'].map(lambda x : x * 10)\n",
    "\n",
    "# 新增一列\n",
    "df2['Pandas'] = df2['Python'].map(lambda x : x * 10)\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>Java</th>\n",
       "      <th>H5</th>\n",
       "      <th>UI</th>\n",
       "      <th>Pandas</th>\n",
       "      <th>Java是否及格</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>张三</th>\n",
       "      <td>61</td>\n",
       "      <td>23</td>\n",
       "      <td>98</td>\n",
       "      <td>83</td>\n",
       "      <td>610</td>\n",
       "      <td>不及格</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张三丰</th>\n",
       "      <td>33</td>\n",
       "      <td>57</td>\n",
       "      <td>55</td>\n",
       "      <td>10</td>\n",
       "      <td>330</td>\n",
       "      <td>不及格</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>李白</th>\n",
       "      <td>72</td>\n",
       "      <td>86</td>\n",
       "      <td>83</td>\n",
       "      <td>5</td>\n",
       "      <td>720</td>\n",
       "      <td>及格</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>杜甫</th>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>91</td>\n",
       "      <td>34</td>\n",
       "      <td>210</td>\n",
       "      <td>不及格</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Python  Java  H5  UI  Pandas Java是否及格\n",
       "张三       61    23  98  83     610      不及格\n",
       "张三丰      33    57  55  10     330      不及格\n",
       "李白       72    86  83   5     720       及格\n",
       "杜甫       21     3  91  34     210      不及格"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 新增一列：判断Java的成绩是否及格\n",
    "df2['Java是否及格'] = df2['Java'].map(lambda n: '及格'  if n>=60 else '不及格')\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>Java</th>\n",
       "      <th>H5</th>\n",
       "      <th>UI</th>\n",
       "      <th>Pandas</th>\n",
       "      <th>Java是否及格</th>\n",
       "      <th>UI等级</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>张三</th>\n",
       "      <td>61</td>\n",
       "      <td>23</td>\n",
       "      <td>98</td>\n",
       "      <td>83</td>\n",
       "      <td>610</td>\n",
       "      <td>不及格</td>\n",
       "      <td>优秀</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张三丰</th>\n",
       "      <td>33</td>\n",
       "      <td>57</td>\n",
       "      <td>55</td>\n",
       "      <td>10</td>\n",
       "      <td>330</td>\n",
       "      <td>不及格</td>\n",
       "      <td>不及格</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>李白</th>\n",
       "      <td>72</td>\n",
       "      <td>86</td>\n",
       "      <td>83</td>\n",
       "      <td>5</td>\n",
       "      <td>720</td>\n",
       "      <td>及格</td>\n",
       "      <td>不及格</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>杜甫</th>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>91</td>\n",
       "      <td>34</td>\n",
       "      <td>210</td>\n",
       "      <td>不及格</td>\n",
       "      <td>不及格</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Python  Java  H5  UI  Pandas Java是否及格 UI等级\n",
       "张三       61    23  98  83     610      不及格   优秀\n",
       "张三丰      33    57  55  10     330      不及格  不及格\n",
       "李白       72    86  83   5     720       及格  不及格\n",
       "杜甫       21     3  91  34     210      不及格  不及格"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用普通函数\n",
    "# 新增一列： 判断UI成绩\n",
    "#  <60  不及格\n",
    "#  60<=n <80 及格\n",
    "#  >=80 优秀\n",
    "\n",
    "def fn(n):\n",
    "    if n < 60:\n",
    "        return '不及格'\n",
    "    elif n < 80:\n",
    "        return '及格'\n",
    "    return '优秀'\n",
    "\n",
    "df2['UI等级'] = df2['UI'].map(fn)\n",
    "df2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 3) rename()函数：替换索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>Java</th>\n",
       "      <th>H5</th>\n",
       "      <th>UI</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>张三</th>\n",
       "      <td>61</td>\n",
       "      <td>23</td>\n",
       "      <td>98</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张三丰</th>\n",
       "      <td>33</td>\n",
       "      <td>57</td>\n",
       "      <td>55</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>李白</th>\n",
       "      <td>72</td>\n",
       "      <td>86</td>\n",
       "      <td>83</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>杜甫</th>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>91</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Python  Java  H5  UI\n",
       "张三       61    23  98  83\n",
       "张三丰      33    57  55  10\n",
       "李白       72    86  83   5\n",
       "杜甫       21     3  91  34"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3 = df.copy()\n",
    "df3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>派森</th>\n",
       "      <th>Java</th>\n",
       "      <th>H5</th>\n",
       "      <th>UI</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>张三</th>\n",
       "      <td>61</td>\n",
       "      <td>23</td>\n",
       "      <td>98</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张三丰</th>\n",
       "      <td>33</td>\n",
       "      <td>57</td>\n",
       "      <td>55</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>李白</th>\n",
       "      <td>72</td>\n",
       "      <td>86</td>\n",
       "      <td>83</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>杜甫</th>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>91</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     派森  Java  H5  UI\n",
       "张三   61    23  98  83\n",
       "张三丰  33    57  55  10\n",
       "李白   72    86  83   5\n",
       "杜甫   21     3  91  34"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3.rename({'张三': 'Mr Zhang'})  # 默认修改行索引名\n",
    "df3.rename({'Python': '派森'}, axis=1)  # 修改列索引名\n",
    "\n",
    "df3.rename(index={'张三': 'Mr Zhang'}) # 修改行索引名\n",
    "df3.rename(columns={'Python': '派森'}) # 修改列索引名"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>Python</th>\n",
       "      <th>Java</th>\n",
       "      <th>H5</th>\n",
       "      <th>UI</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>张三</td>\n",
       "      <td>61</td>\n",
       "      <td>23</td>\n",
       "      <td>98</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>张三丰</td>\n",
       "      <td>33</td>\n",
       "      <td>57</td>\n",
       "      <td>55</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>李白</td>\n",
       "      <td>72</td>\n",
       "      <td>86</td>\n",
       "      <td>83</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>杜甫</td>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>91</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  index  Python  Java  H5  UI\n",
       "0    张三      61    23  98  83\n",
       "1   张三丰      33    57  55  10\n",
       "2    李白      72    86  83   5\n",
       "3    杜甫      21     3  91  34"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 重置索引\n",
    "df3.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>Java</th>\n",
       "      <th>UI</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>H5</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>61</td>\n",
       "      <td>23</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>33</td>\n",
       "      <td>57</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>72</td>\n",
       "      <td>86</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Python  Java  UI\n",
       "H5                  \n",
       "98      61    23  83\n",
       "55      33    57  10\n",
       "83      72    86   5\n",
       "91      21     3  34"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 设置行索引\n",
    "df3.set_index(keys=['H5'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 4) apply()函数：既支持 Series，也支持 DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "A       1      9       6\n",
       "B       1      5       8\n",
       "C       5      7       0\n",
       "D       5      2       1\n",
       "E       0      7       3"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(data=np.random.randint(0, 10, size=(5, 3)),\n",
    "                              index=list('ABCDE'),\n",
    "                              columns=['Python', 'NumPy', 'Pandas']\n",
    "                 )\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A    False\n",
       "B    False\n",
       "C    False\n",
       "D    False\n",
       "E    False\n",
       "Name: Python, dtype: bool"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 用于Series, 其中x表式的Series中元素\n",
    "df['Python'].apply(lambda x:  True if x>5 else False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A    5.333333\n",
       "B    4.666667\n",
       "C    4.000000\n",
       "D    2.666667\n",
       "E    3.333333\n",
       "dtype: float64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 用于DataFrame, 其中x是DataFrame中某列或某行的Series数据\n",
    "df.apply(lambda x : x.mean(), axis=0)  # 求每一列数据的平均值\n",
    "df.apply(lambda x : x.mean(), axis=1)  # 求每一行数据的平均值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A    (5.3, 3)\n",
       "B    (4.7, 3)\n",
       "C    (4.0, 3)\n",
       "D    (2.7, 3)\n",
       "E    (3.3, 3)\n",
       "dtype: object"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 自定义方法\n",
    "def fn2(x):\n",
    "    return  (np.round(x.mean(), 1), x.count())  # 平均值，计数\n",
    "\n",
    "df.apply(fn2, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>99.538744</td>\n",
       "      <td>100.166451</td>\n",
       "      <td>100.408078</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>99.576486</td>\n",
       "      <td>101.517029</td>\n",
       "      <td>99.853246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100.676325</td>\n",
       "      <td>98.893773</td>\n",
       "      <td>99.739946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>99.612123</td>\n",
       "      <td>99.760570</td>\n",
       "      <td>99.075375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>98.193969</td>\n",
       "      <td>100.017728</td>\n",
       "      <td>100.867650</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9995</th>\n",
       "      <td>100.837929</td>\n",
       "      <td>99.433272</td>\n",
       "      <td>100.605699</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9996</th>\n",
       "      <td>100.772187</td>\n",
       "      <td>101.423714</td>\n",
       "      <td>99.833738</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9997</th>\n",
       "      <td>100.299821</td>\n",
       "      <td>100.029718</td>\n",
       "      <td>100.717434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9998</th>\n",
       "      <td>100.676278</td>\n",
       "      <td>99.808378</td>\n",
       "      <td>101.047780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999</th>\n",
       "      <td>100.390665</td>\n",
       "      <td>101.573158</td>\n",
       "      <td>100.960448</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               0           1           2\n",
       "0      99.538744  100.166451  100.408078\n",
       "1      99.576486  101.517029   99.853246\n",
       "2     100.676325   98.893773   99.739946\n",
       "3      99.612123   99.760570   99.075375\n",
       "4      98.193969  100.017728  100.867650\n",
       "...          ...         ...         ...\n",
       "9995  100.837929   99.433272  100.605699\n",
       "9996  100.772187  101.423714   99.833738\n",
       "9997  100.299821  100.029718  100.717434\n",
       "9998  100.676278   99.808378  101.047780\n",
       "9999  100.390665  101.573158  100.960448\n",
       "\n",
       "[10000 rows x 3 columns]"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# applymap: DataFrame专有的方法，其中的x是每个元素\n",
    "df.map(lambda x : x + 100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 5) transform()函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "A       3      5       6\n",
       "B       8      5       6\n",
       "C       8      7       3\n",
       "D       4      6       3\n",
       "E       3      1       1"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(data=np.random.randint(0, 10, size=(5, 3)),\n",
    "                              index=list('ABCDE'),\n",
    "                              columns=['Python', 'NumPy', 'Pandas']\n",
    "                 )\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>-30</td>\n",
       "      <td>-50</td>\n",
       "      <td>-60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>80</td>\n",
       "      <td>50</td>\n",
       "      <td>60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>80</td>\n",
       "      <td>70</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>-40</td>\n",
       "      <td>-60</td>\n",
       "      <td>-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>-30</td>\n",
       "      <td>-10</td>\n",
       "      <td>-10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "A     -30    -50     -60\n",
       "B      80     50      60\n",
       "C      80     70      30\n",
       "D     -40    -60     -30\n",
       "E     -30    -10     -10"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series中使用transform\n",
    "# 可以执行多项计算\n",
    "df['Python'].transform([np.sqrt, np.exp])\n",
    "\n",
    "# DataFrame中使用transform\n",
    "def convert(x):\n",
    "    if x.mean() > 5:\n",
    "        return x * 10\n",
    "    return x * (-10)\n",
    "\n",
    "df.transform(convert)  # 处理每一列\n",
    "df.transform(convert, axis=1)  # 处理每一行"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. 异常值检测和过滤"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- describe(): 查看每一列的描述性统计量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "A       3      5       6\n",
       "B       8      5       6\n",
       "C       8      7       3\n",
       "D       4      6       3\n",
       "E       3      1       1"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>1%</th>\n",
       "      <th>30%</th>\n",
       "      <th>40%</th>\n",
       "      <th>50%</th>\n",
       "      <th>90%</th>\n",
       "      <th>99%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Python</th>\n",
       "      <td>5.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.588436</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.00</td>\n",
       "      <td>3.2</td>\n",
       "      <td>3.6</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.00</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NumPy</th>\n",
       "      <td>5.0</td>\n",
       "      <td>4.8</td>\n",
       "      <td>2.280351</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.16</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>6.96</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pandas</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>2.167948</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.08</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.00</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        count  mean       std  min    1%  30%  40%  50%  90%   99%  max\n",
       "Python    5.0   5.2  2.588436  3.0  3.00  3.2  3.6  4.0  8.0  8.00  8.0\n",
       "NumPy     5.0   4.8  2.280351  1.0  1.16  5.0  5.0  5.0  6.6  6.96  7.0\n",
       "Pandas    5.0   3.8  2.167948  1.0  1.08  3.0  3.0  3.0  6.0  6.00  6.0"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()\n",
    "df.describe([0.01, 0.3, 0.4, 0.9, 0.99])\n",
    "df.describe([0.01, 0.3, 0.4, 0.9, 0.99]).T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- df.std() : 可以求得DataFrame对象每一列的标准差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Python    2.588436\n",
       "NumPy     2.280351\n",
       "Pandas    2.167948\n",
       "dtype: float64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.std()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- df.drop(): 删除特定索引 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "A       3      5       6\n",
       "B       8      5       6\n",
       "C       8      7       3\n",
       "D       4      6       3\n",
       "E       3      1       1"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = df.copy()\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "C       8      7       3\n",
       "D       4      6       3\n",
       "E       3      1       1"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.drop('A')  # 默认删除行\n",
    "df2.drop('Python', axis=1)  # 删除列\n",
    "\n",
    "df2.drop(index='A')  # 删除行\n",
    "df2.drop(columns='Python')  # 删除列\n",
    "\n",
    "# 删除多列或多行\n",
    "df2.drop(columns=['NumPy', 'Python'])\n",
    "df2.drop(index=['A', 'B'], inplace=True)\n",
    "df2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- unique() : 唯一,去重"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "A       3      5       6\n",
       "B       8      5       6\n",
       "C       8      7       3\n",
       "D       4      6       3\n",
       "E       3      1       1"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3, 8, 4], dtype=int32)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# DataFrame没有unique，Series调用unique\n",
    "df['Python'].unique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- df.query : 按条件查询"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "A       3      5       6\n",
       "B       8      5       6\n",
       "C       8      7       3\n",
       "D       4      6       3\n",
       "E       3      1       1"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "A       3      5       6\n",
       "D       4      6       3\n",
       "E       3      1       1"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ==， >, <\n",
    "# and, &\n",
    "#  or ， |\n",
    "# in\n",
    "df.query('Python == 9')  # 找到Python列中等于9的所有行\n",
    "df.query('Python < 8')\n",
    "\n",
    "df.query('Python>6 and NumPy==2')\n",
    "df.query('Python>6 & NumPy==2')\n",
    "\n",
    "df.query('Python==3 or NumPy==2')\n",
    "df.query('Python==3 | NumPy==2')\n",
    "\n",
    "df.query('Python in [3, 4, 5, 6]')  # 成员运算符\n",
    "\n",
    "# 使用变量\n",
    "n = 7\n",
    "df.query('Python == @n')  # @n 表式使用变量n的值\n",
    "m =  [3, 4, 5, 6]\n",
    "df.query('Python in @m')  # 成员运算符"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "- df.sort_values(): 根据值排序\n",
    "- df.sort_index(): 根据索引排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "A       3      5       6\n",
       "B       8      5       6\n",
       "C       8      7       3\n",
       "D       4      6       3\n",
       "E       3      1       1"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "      <th>Python</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NumPy  Pandas  Python\n",
       "A      5       6       3\n",
       "B      5       6       8\n",
       "C      7       3       8\n",
       "D      6       3       4\n",
       "E      1       1       3"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#  sort_values : 默认按照列名排序，默认升序  （常用）\n",
    "df.sort_values('Python')\n",
    "# ascending: 是否升序，默认是True\n",
    "df.sort_values('Python', ascending=False)  # 降序\n",
    "\n",
    "# 根据行索引名排序，会把列进行排序（不常用）\n",
    "df.sort_values('B', axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "A       3      5       6\n",
       "B       8      5       6\n",
       "C       8      7       3\n",
       "D       4      6       3\n",
       "E       3      1       1"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>Pandas</th>\n",
       "      <th>NumPy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  Pandas  NumPy\n",
       "A       3       6      5\n",
       "B       8       6      5\n",
       "C       8       3      7\n",
       "D       4       3      6\n",
       "E       3       1      1"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 按照索引名排序 （不常用）\n",
    "# 默认是对行索引进行排序，默认是升序\n",
    "df.sort_index(ascending=False)\n",
    "# 按照列索引排序\n",
    "df.sort_index(ascending=False, axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- df.info(): 查看数据信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 5 entries, A to E\n",
      "Data columns (total 3 columns):\n",
      " #   Column  Non-Null Count  Dtype\n",
      "---  ------  --------------  -----\n",
      " 0   Python  5 non-null      int32\n",
      " 1   NumPy   5 non-null      int32\n",
      " 2   Pandas  5 non-null      int32\n",
      "dtypes: int32(3)\n",
      "memory usage: 272.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "# info: 常用\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "============================================\n",
    "\n",
    "练习：\n",
    "\n",
    "    新建一个形状为10000*3的标准正态分布的DataFrame(np.random.randn)，去除掉所有满足以下情况的行：其中任一元素绝对值大于3倍标准差\n",
    "\n",
    "============================================"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.461256</td>\n",
       "      <td>0.166451</td>\n",
       "      <td>0.408078</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.423514</td>\n",
       "      <td>1.517029</td>\n",
       "      <td>-0.146754</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.676325</td>\n",
       "      <td>-1.106227</td>\n",
       "      <td>-0.260054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.387877</td>\n",
       "      <td>-0.239430</td>\n",
       "      <td>-0.924625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.806031</td>\n",
       "      <td>0.017728</td>\n",
       "      <td>0.867650</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0 -0.461256  0.166451  0.408078\n",
       "1 -0.423514  1.517029 -0.146754\n",
       "2  0.676325 -1.106227 -0.260054\n",
       "3 -0.387877 -0.239430 -0.924625\n",
       "4 -1.806031  0.017728  0.867650"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.461256</td>\n",
       "      <td>0.166451</td>\n",
       "      <td>0.408078</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.423514</td>\n",
       "      <td>1.517029</td>\n",
       "      <td>-0.146754</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.676325</td>\n",
       "      <td>-1.106227</td>\n",
       "      <td>-0.260054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.387877</td>\n",
       "      <td>-0.239430</td>\n",
       "      <td>-0.924625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.806031</td>\n",
       "      <td>0.017728</td>\n",
       "      <td>0.867650</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9995</th>\n",
       "      <td>0.837929</td>\n",
       "      <td>-0.566728</td>\n",
       "      <td>0.605699</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9996</th>\n",
       "      <td>0.772187</td>\n",
       "      <td>1.423714</td>\n",
       "      <td>-0.166262</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9997</th>\n",
       "      <td>0.299821</td>\n",
       "      <td>0.029718</td>\n",
       "      <td>0.717434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9998</th>\n",
       "      <td>0.676278</td>\n",
       "      <td>-0.191622</td>\n",
       "      <td>1.047780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999</th>\n",
       "      <td>0.390665</td>\n",
       "      <td>1.573158</td>\n",
       "      <td>0.960448</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9906 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             0         1         2\n",
       "0    -0.461256  0.166451  0.408078\n",
       "1    -0.423514  1.517029 -0.146754\n",
       "2     0.676325 -1.106227 -0.260054\n",
       "3    -0.387877 -0.239430 -0.924625\n",
       "4    -1.806031  0.017728  0.867650\n",
       "...        ...       ...       ...\n",
       "9995  0.837929 -0.566728  0.605699\n",
       "9996  0.772187  1.423714 -0.166262\n",
       "9997  0.299821  0.029718  0.717434\n",
       "9998  0.676278 -0.191622  1.047780\n",
       "9999  0.390665  1.573158  0.960448\n",
       "\n",
       "[9906 rows x 3 columns]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(np.random.randn(10000, 3))\n",
    "display(df.head())\n",
    "\n",
    "# 过滤掉 大于3倍标准差的行\n",
    "# 标准差 df.std()\n",
    "# 绝对值 df.abs()\n",
    "\n",
    "# cond：找到每一个元素是否大于3倍标准差\n",
    "cond = df.abs() > df.std() *3\n",
    "cond\n",
    "\n",
    "# 找到存在大于3倍标准差的行\n",
    "cond2 = cond.any(axis=1)\n",
    "cond2\n",
    "# cond2.sum()\n",
    "\n",
    "# bool值索引，过滤异常值(大于3倍标准差)\n",
    "df.loc[~cond2]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4. 抽样"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 使用.take()函数排序\n",
    "- 可以借助np.random.permutation()函数随机排序"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "无放回抽样"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "C       8      7       3\n",
       "D       4      6       3\n",
       "E       3      1       1"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "E       3      1       1\n",
       "D       4      6       3\n",
       "C       8      7       3"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.take([1, 0, 2])  # 行排列\n",
    "df2.take([1, 0, 2], axis=1)  # 列排列\n",
    "\n",
    "# 随机排列\n",
    "np.random.permutation([0, 1, 2])\n",
    "\n",
    "# 无放回抽样: 依次随机取出，没有重复值\n",
    "df2.take(np.random.permutation([0, 1, 2]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "scrolled": true
   },
   "source": [
    "有放回抽样"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Python</th>\n",
       "      <th>NumPy</th>\n",
       "      <th>Pandas</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Python  NumPy  Pandas\n",
       "D       4      6       3\n",
       "D       4      6       3\n",
       "E       3      1       1\n",
       "D       4      6       3\n",
       "C       8      7       3"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 有放回抽样： 可能会出现重复值\n",
    "\n",
    "np.random.randint(0, 3, size=5)\n",
    "\n",
    "df2.take(np.random.randint(0, 3, size=5))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
